def sort_by_tfidf(partition):
    TOPK = 20
    for row in partition:
        # 找到索引与 IDF 值并进行排序
        _dict = list(zip(row.idfFeatures.indices, row.idfFeatures.values))
        _dict = sorted(_dict, key=lambda x: x[1], reverse=True)
        result = _dict[:TOPK]
        for word_index, tfidf in result:
            yield row.id, int(word_index), round(float(tfidf), 4)
